/* Recovers beliefs from GSU gender and competitiveness experiments */

* log file
capture log close _all
log using "Recover GSU Gender and Competitiveness Beliefs.log", replace name(recover)

* configure Stata
capture: version 18
set processors 4
set more off
set scheme s1color
set seed 987654321
timer clear 1
timer on 1

* graphics font
graph set window fontface "Candara"

* tell us what version ran
about

* global for demographics
global demog ""

* do all the files in the dta_dir? (yes or no)
global do_all "yes"

* do only the dta_file? if so, next list the name of the file, to get it done (yes or no)
global do_specific "yes"

* name of specific input and output file, only relevant if $do_specific is "yes"
global use_specific   "gc_s1_q1"
global save_specific  "gc_s1_q1_b"

* erase recovery_room files at the beginning and end?
global clean_room "yes"

* create directories needed
capture: mkdir figures
capture: mkdir recovery_room
capture: mkdir recovered

* clean up
if "$clean_room" == "yes" {
	cd recovery_room
	local dfiles : dir "$dta_dir" files "*dta"
	foreach file of local dfiles {
		erase "`file'"
	}
	cd ..
}

* load the code to recover beliefs
qui: do belief_recovery

* quiet, to speed up
qui {

* decide which risk model to use (eut or rdu)
local risk_model "eut"

* collate the BHM estimates with beliefs
use bhm_`risk_model'2022_posterior, clear
summ _loglikelihood if id==1
local mcmc = r(N)
compress
save tmp_bhm, replace
noi: di "Saved posterior with risk preferences ..."

* restart with main data containing the beliefs data
use GC_beliefs, clear

* keep beliefs
keep if Task == "5. Piece Rate Rank?" | Task == "6. Tournament Rank?"

* qid already exists, with a different name
rename stimulus_id qid
compress qid

* identify the questions
generate int question=.
replace question=1 if qid=="tournament_5"
replace question=2 if qid=="tournament_6"

* id to match BHM
egen int id = group(uid)
tab id
drop sid
egen int sid = group(id)
summ sid
local Nsub = r(max)

* choiceI exists
des choiceI
tab choiceI

* bbin exists
label define bin4 1 "Best" 2 "Second Best" 3 "Third Best" 4 "Fourth Best" 
label values bbin bin4
sort question bbin
list question qid bbin choiceI if sid==1, sepby(question)
keep if bbin<=4

keep id sid question qid choiceI bbin alpha beta female

* reshape wide
reshape wide choiceI, i(id question) j(bbin)
forvalues x=1/4 {
	generate int r_`x' = choiceI`x'
}
generate int nbin = 4
generate int ntokens = 100

* QSR parameters specified as a global
foreach x in nbin ntokens alpha beta {
	summ `x'
	global `x' = r(mean)
}
di $nbin

label define flab 0 "Male" 1 "Female"
label values female flab

* save data, only using gender demographic
keep id sid question qid r_* female
compress
sort id question
list in 1/30, sepby(id) noobs
save tmp_id, replace
noi: di "Saved beliefs data, and now generating recovery files for each subject ..."

* generate for each subject
tab question if sid==1
local nq = r(r)
forvalues s=1/`Nsub' {
	forvalues q=1/`nq' {
		use tmp_id, clear
		count if question==`q' & sid==`s'
		local count = r(N)
		if `count'>0 {
			keep if question==`q' & sid==`s'
			sum id if sid==`s'
			local id = r(mean)
			save tmp_sid_b, replace
			use tmp_bhm, clear
			drop _logposterior
			keep if id == `id'
			capture: generate eta = .
			capture: generate phi = .
			capture: generate mu = .
			generate int sid = `s'
			generate int question = `q'
			sort sid question
			merge m:1 sid question using tmp_sid_b
			drop _merge
			generate long mcmc = `mcmc'
			compress
			save recovery_room/gc_s`s'_q`q', replace
			des
			tab qid
			summ
		}
	}
}


* recover beliefs
timer on 2

* CRRA, CARA, Power, and Expo-Power utility functions supported, the last one standing is the one used
global ufunc "cara"
global ufunc "expo"
global ufunc "power"
global ufunc "crra"

* Four probability weighting functions are supported, the last one standing in the next few lines is the one used if RDU is used
global pfunc "eut"
global pfunc "power"
global pfunc "inverse_s"
global pfunc "prelec2"

if "`risk_model'" == "eut" {
	global pfunc "eut"
}
if "`risk_model'" == "rdu" {
	global pfunc "prelec2"
}

* controls the names of the parameters for the utility function
local upars "r"
if "$ufunc" == "expo" {
	local upars "r alpha"
}

* control the names of the parameters for the probability weighting function
local ppars ""
if "$pfunc" == "power" | "$pfunc" == "inverse_s" {
	local ppars "gamma"
}
if "$pfunc" == "prelec2" {
	local ppars "phi eta"
}

* do the belief recovery
noi: di "Recovering beliefs ..."

	if "$do_all" == "yes" {

		* subdirectory with data files
		cd recovery_room

		* use extended functions to get all the files in some data folder
		local dfiles : dir "$dta_dir" files "*dta"
		foreach file of local dfiles {

			* load the file
			use "`file'", clear

			* belief recovery
			beliefs_recovery `upars' `ppars'

			* save the output file in the current directory
			cd ..
			save "recovered/`file'", replace
			cd recovery_room

		}
		cd ..
	}

	if "$do_specific" == "yes" {

		* subdirectory with data files
		cd recovery_room

		use "$use_specific", clear

		* belief recovery
		beliefs_recovery `upars' `ppars'
		summ
		cd ..
		save "recovered/$save_specific", replace

	}


* clean up
if "$clean_room" == "yes" {
	noi: di "Cleaning up the recovery_room files ..."
	cd recovery_room
	local dfiles : dir "$dta_dir" files "*dta"
	foreach file of local dfiles {
		erase "`file'"
	}
	cd ..
}
capture: erase tmp.dta
capture: erase tmp_bhm.dta
capture: erase tmp_sid.dta
capture: erase tmp_id.dta
capture: erase tmp_sid_b.dta

* end of quietly
}

* time taken overall
timer off 2
timer list
local secs = r(t2)
local mins = `secs'/60
local hrs = `mins'/60
local secs_ = string(`secs', "%10.0f")
local mins_ = string(`mins', "%4.1f")
local hrs_ = string(`hrs', "%4.2f")
di "Belief recovery calculations by themselves took `secs_' seconds, `mins_' minutes, or `hrs_' hours."

* time taken overall
timer off 1
timer list
local secs = r(t1)
local mins = `secs'/60
local hrs = `mins'/60
local secs_ = string(`secs', "%10.0f")
local mins_ = string(`mins', "%4.1f")
local hrs_ = string(`hrs', "%4.2f")
di "Complete calculations took `secs_' seconds, `mins_' minutes, or `hrs_' hours."

log close recover
